const axios = require('axios');
const robotsParser = require('robots-parser');

async function canCrawl(url) {
  try {
    const robotsUrl = new URL('/robots.txt', url).href;
    const response = await axios.get(robotsUrl, { timeout: 5000 });
    
    const robots = robotsParser(robotsUrl, response.data);
    const userAgent = 'Mozilla/5.0';
    
    return robots.isAllowed(url, userAgent);
  } catch (error) {
    // 如果获取失败，默认允许爬取
    console.warn(`获取robots.txt失败: ${url}`, error.message);
    return true;
  }
}

module.exports = {
  canCrawl
};  